home *** CD-ROM | disk | FTP | other *** search
- /******************************************************/
- /* */
- /* Search.c */
- /* */
- /* Uses the Boyer-Moore search algorithm. Reference: */
- /* "A Fast String Searching Algorithm," Robert S. */
- /* Boyer and J Strother Moore, CACM v. 20 no. 10 */
- /* (October 1977), pp. 762-772. */
- /* Their original algorithm is modified here to do */
- /* case-insensitive searches. */
- /* */
- /* Written in Think C version 4.0.2 */
- /* */
- /* Allen Stenger January 1991 */
- /* */
- /******************************************************/
-
- #define DOCOUNT 0
- /* count number of passes in loops */
- #define DOTIMING 0 /* measure time of searches */
-
- #include <string.h>
- #include <ctype.h>
- #include "Browser.h"
- #include "Search.h"
-
- #define LARGE 100000000L
- /* LARGE is picked to be larger than any possible */
- /* file size. It is a flag in delta0 indicating */
- /* that the index character is in the pattern. */
-
- /* These are the Boyer-Moore tables, which tell how */
- /* far the pattern may be shifted for the next trial */
- /* comparison. delta0 is indexed by unsigned char, */
- /* and delta2 is indexed by position in the pattern. */
- static long delta0[256];
- static short delta2[256];
-
- char searchString[256] = {256 *'\0'};
- /* saves the search string */
-
- /******************************************************/
- /* */
- /* internal functions */
- /* */
- /******************************************************/
-
- /* prototypes for internal functions */
-
- /* Calculate the Boyer-Moore delta0 and delta2 tables.*/
- /* Tables are calculated assuming upper and lower case*/
- /* are the same. The search pattern is the global */
- /* variable searchString, and the tables are stored */
- /* in the global variables delta0 and delta2. */
-
- static void GetDelta0( void );
-
- static void GetDelta2( void );
-
- /* Boyer-Moore search method - returns TRUE if string */
- /* found, FALSE otherwise. */
-
- static Boolean /* returns whether found */
- BMSearch( char *string, /* target string */
- long stringlen,/* length of *string */
- long *offsetP); /* output - where found */
-
- /* end prototypes */
-
- static void GetDelta0( void )
- {
- short i; /* loop control */
- char *pat = searchString; /* local copy */
- long patlen = strlen(pat); /* local constant */
- for (i=0; i<256; i++) delta0[i] = patlen;
- for (i=0; i<patlen; i++) {
- delta0[ (unsigned char) tolower(searchString[i]) ] =
- patlen - 1 - i;
- delta0[ (unsigned char) toupper(searchString[i]) ] =
- patlen - 1 - i;
- }
- delta0[ (unsigned char)
- tolower(searchString[patlen-1]) ] = LARGE;
- delta0[ (unsigned char)
- toupper(searchString[patlen-1]) ] = LARGE;
- }
-
- static void GetDelta2( void )
- {
- #define SameCharAtPos(a, b) ( \
- (tolower(pat[a]) == tolower(pat[b])) \
- ||(toupper(pat[a]) == toupper(pat[b])) )
-
- short i; /* index in trial string for rpr */
- short j; /* rpr(j) now being calculated */
- short k; /* trial for rpr */
- short currentRPR; /* latest good rpr(j) */
- char *pat = searchString; /* local copy */
- short patlen = strlen(pat); /* local constant */
- Boolean unifies; /* TRUE if pat[j+1]..pat[patlen-1]*/
- /* unifies with */
- /* pat[k]..pat[k+patlen-j-2] */
-
-
-
- for (j=0; j<=patlen-1; j++) {
- /* Calculate rpr(j). NOTE: our rpr is one less */
- /* than the Boyer-Moore rpr, because our arrays */
- /* start at 0 and theirs at 1. The delta2 values */
- /* are the same, although indexed beginning at 0. */
- currentRPR = j + 1 - patlen;
- for (k=j+2-patlen; k<=j; k++) {
- /* check for reoccurrence at pat[k] */
- unifies = TRUE;
-
- for (i=0; i<=patlen-2-j; i++) {
- if ( (k+i>=0) && !SameCharAtPos(j+1+i,k+i))
- unifies = FALSE;
- }
- if ( unifies &&
- ( (k<1) || !SameCharAtPos(k-1,j) ) )
- currentRPR = k; /* found rightmore k */
- }
- delta2[j] = patlen - currentRPR;
- }
- }
-
- static Boolean
- BMSearch( register char *string,
- register long stringlen,
- long *offsetP)
- {
- register long
- i; /* index into string of current pointer */
- register short
- j; /* index into pat for char-by-char */
- char *pat = searchString; /* local copy */
- short patlen = strlen(pat); /* local constant */
- register long
- d0jump,
- d2jump; /* sizes of jumps indicated by */
- /* delta0, delta2 */
-
- #if DOCOUNT
- /* number of times through loops */
- long whileTRUECount = 0; /* while(TRUE) loop */
- long whileICount = 0; /* while(i) loop */
- long whileJCount = 0; /* while(j) loop */
- #endif
-
- i = patlen - 1;
- if (i >= stringlen) return( FALSE );
- while (TRUE) {
- #if DOCOUNT
- whileTRUECount++;
- #endif
-
- /* inner loop of Boyer-Moore algorithm */
- while( (i += delta0[ (unsigned long)
- (unsigned char) string[i] ])
- < stringlen ) {
- #if DOCOUNT
- whileICount++;
- #endif
- };
-
- if (i<LARGE) return( FALSE );
- i -= (LARGE + 1);
- j = patlen - 2;
-
- /* character-by-character comparison */
- while ( (j>=0) &&
- (tolower(string[i]) == tolower(pat[j])) ) {
- #if DOCOUNT
- whileJCount++;
- #endif
- --j;
- --i;
- }
- if (j < 0) {
- /* success - whole pattern matched */
- *offsetP = i + 1;
- return( TRUE );
- }
- /* failure - only part of pattern matched - get */
- /* shifts indicated by delta0 (single-character */
- /* mismatch) and by delta2 (next plausible */
- /* reoccurrence) and take the larger. */
- d0jump = delta0[ (unsigned long)
- (unsigned char) string[i] ];
- if (d0jump == LARGE) d0jump = 0;
- d2jump = delta2[j];
- i += (d0jump > d2jump) ? d0jump : d2jump;
- }
- }
-
- /******************************************************/
- /* */
- /* External functions */
- /* */
- /******************************************************/
-
- Boolean GoFind( Handle theTextH,
- long *offsetP)
- {
- long newOffset; /* where match found */
- Boolean matchFound; /* whether match found */
- #if DOTIMING
- long startTime, endTime;
- float elapsedTime; /* time for GoFind (seconds) */
- #endif
-
- #if DOTIMING
- startTime = TickCount();
- #endif
-
- /* note - BMSearch always starts from the beginning */
- /* of the string, so we have to offset the text and */
- /* add relative offsets */
-
- matchFound = BMSearch(
- *theTextH + *offsetP,
- GetHandleSize(theTextH) - *offsetP,
- &newOffset );
- if (matchFound) *offsetP += newOffset;
-
- #if DOTIMING
- endTime = TickCount();
- elapsedTime = (endTime - startTime) / 60.0;
- #endif
-
- return (matchFound);
- }
-
- Boolean GetSearchString( void )
- {
- short itemHit; /* which item in dialog selected */
- DialogPtr theDialogP; /* pointer to modal dialog */
- short itemType; /* for GetDItem */
- Handle itemHandle; /* for GetDItem */
- Rect box; /* for GetDItem */
-
- theDialogP = GetNewDialog(dlogSearch, NULL, -1L);
- CtoPstr(searchString);
- GetDItem(theDialogP, dlogText, &itemType,
- &itemHandle, &box);
- SetIText(itemHandle, searchString);
- SelIText(theDialogP, dlogText, 0, 32767);
- itemHit = dlogText;
- while ( !( (itemHit == dlogOK) ||
- (itemHit == dlogCancel)) )
- ModalDialog(NULL, &itemHit);
- if (itemHit == dlogOK) {
- GetDItem(theDialogP, dlogText, &itemType,
- &itemHandle, &box);
- GetIText(itemHandle, searchString);
- }
- PtoCstr(searchString);
- /* convert back to C whether entered or not */
- DisposDialog(theDialogP);
-
- if (strlen(searchString) != 0) {
- GetDelta0();
- GetDelta2();
- }
- return( (itemHit == dlogOK) &&
- (strlen(searchString) != 0) );
- }
-
- Boolean HaveSearchString( void )
- {
- return (strlen(searchString) != 0);
- }
-